pacman::p_load(fuzzyjoin,#fuzzy string joins
               stringi,#string distance functions
               stats,#statistics package
               #dplyr,#for data manipulation
               stringdist,#
               ggplot2,#plots
               igraph,#network graphs - used for defining markets
               ggmap,#using google maps and plotting things
               maps,#also for mapping
               googleway,#compute driving distances
               GGally,
               network,
               sna,ggnetwork,intergraph,ggraph,segmented,revgeo,xtable,stringr,gtools,tm,stargazer,
               fastDummies, #allows me to create dummy variables easily
               AER,#ivreg
               scales,
               stargazer,
               readxl,
               tidyr,#data manip
               dplyr,
               parallel)#allows better ggplot scales)

wd<-"C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP/"
wd_data_raw<-paste0(wd,'data/raw/')
wd_data_raw_other<-paste0(wd,'data/raw/other/')
wd_data_intermediate<-paste0(wd,'data/intermediate/')
wd_data_final<-paste0(wd,'data/final/')
wd_code<-paste0(wd,'code/raw to final data')
setwd(wd)


# key<- #your Google API key for geocoding 
# set_key(key = key)
# google_keys()
years_bac<-2019:2018
years_adm<-2015:2014
############ Import and Clean Data#################

##########Load, clean, merge and save 2018 data - this was done before 2008-2017 data was obtained
###Fix files that have issues:
#2008 bac is missing many Bucharest students
setwd(wd_code)
eval(parse('clean__pre_2018__2008_bac_missing_students.R', encoding = 'UTF-8'))
clean__pre_2018__2008_bac_missing_students()
#2010 adm is missing HS for all students
setwd(wd_code)
eval(parse('clean__pre_2018__2010_adm_harmonize.R', encoding = 'UTF-8'))
clean__pre_2018__2010_adm_harmonize()


#CLEAN
setwd(wd_code)
eval(parse('clean__main.R', encoding = 'UTF-8'))
clean__main(years_bac,years_adm)

# years_adm<-2017:2004
#Add unique codes - admissions
#HS
setwd(wd_code)
setwd('./Codes/Adm HS SIIIR Codes/')
eval(parse('add_codes_adm_HS_wrapper.R', encoding = 'UTF-8'))
add_codes_adm_HS_wrapper(years_adm)
#MS
setwd(wd_code)
setwd('./Codes/Adm MS SIIIR Codes/')
eval(parse('add_codes_adm_MS_wrapper.R', encoding = 'UTF-8'))
add_codes_adm_MS_wrapper(years_adm)




#This has been done! Do not need to run this again. Simply use the locations already saved
#Get address
setwd(wd_code)
eval(parse('get_addresses_v2.R', encoding = 'UTF-8'))
clean_get_addresses(years_bac,years_adm)

#Add addresses and GPS coordinates
years_bac<-2019:2008
years_adm<-2017:2004
setwd(wd_code)
eval(parse('clean_add_addresses_main.R', encoding = 'UTF-8'))
clean_add_addresses_main(years_bac,years_adm)

#Merge hs grad and hs entrance data
#years with both types of records
setwd(wd_code)
# years_bac<-2019:2008
eval(parse('merge.R', encoding = 'UTF-8'))
merge(years_bac)

#Get towns
#MS
setwd(wd_code)
years<-c(2019:2018)
source('clean__get_town_hs__main_v2.R')
clean__get_town_hs__main(years)

#HARMONIZE HS NAMES
#MS
setwd(wd_code)
 years<-c(2019:2018)
source('clean__harmonize_hs_names_v5.R')
clean__harmonize_hs_names(years)

#Add SIIIR codes
# years_bac<-2019:2008
setwd(wd_code)
setwd('./Codes/Bac HS SIIIR Codes/')
eval(parse('add_codes_bac_wrapper.R', encoding = 'UTF-8'))
add_codes_bac_wrapper(years_bac)


####################add years with only adm records
# years_bac<-2021:2020
setwd(wd_code)
eval(parse('merge_no_bac.R', encoding = 'UTF-8'))
merge_no_bac(years_bac)

# #Fix HS that are unmatched
# years<-c(2019:2008)
# source('clean__fix__hs_matches.R')
# clean__fix__hs_matches(years)


########### Get harmonized HS list
# this will be used in the merge function in order to create harmonized hs names and ID's


#data<-merge_bac_data(data_adm_raw,data_bac_raw)
#data_merged_complete<-data[[1]]
#data_merged_filtered<-data[[2]]

############## Harmonize HS names
